#########################
# Ethan vanderWilden
# Analysis/Results file
# Stigmatizing the radical right (with Laia Balcells, Sergi Martinez, and Vicente Valentim)

# Last Replicated: June 3, 2025

# DESCRIPTION: Main quantitative analysis file to replicate all numbers, figures, and tables

# OUTPUTS: List figures, tables, etc.
#########################

########## 1. Load in packages, data, and helper functions ####

### 1.1 Packages

#list of packages
pkg = c("tidyverse", "patchwork", "haven", "modelsummary", "Hmisc",
        "collapse", "attritevis", "factoextra", "kableExtra", "ggridges")

#NOTE: in some cases, the "attritevis" package may need to be installed directly from github
if (!("attritevis" %in% rownames(installed.packages()))){
  library(devtools)
  install_github("lbassan/attritevis")}

# Install packages if necessary
if (length(setdiff(pkg, rownames(installed.packages()))) > 0) {
  install.packages(setdiff(pkg, rownames(installed.packages())))}


# Load packages
suppressWarnings(suppressMessages(lapply(pkg, library, character.only = TRUE)))




### 1.2 Clear environment and load data
rm(list=ls()) #clear local environment
setwd("~") #SET TO RELEVANT WORKING DIRECTORY

#load in data with incompletes
df_incompletes <- read_sav("df_incompletesBMVV.sav")

#load in data incompletes removed
data <- read_sav("dataBMVV.sav")


## Alternatively, can run following line to remove incompletes:
#data <- sbt(df_incompletes, quality.check == 7 & duration > 90 & duration < 1800 & Finished == 1)


### 1.3 Load in 'helper functions' file
source("functionsBMVV.R")



########## 2. Data manipulation ####

### 2.1 Flip "acceptability of radical right questions" such that high stigma = high value

#recode
data$first.accept <- 10 - data$first.accept #flip variable (high values = high stigma)
data$media.accept <- 10 - data$media.accept #flip variable (high values = high stigma)
data$peer.accept <- 10 - data$peer.accept #flip variable (high values = high stigma)
data$elite.accept <- 10 - data$elite.accept #flip variable (high values = high stigma)


attr(data$first.accept, "label") <- paste(attr(data$first.accept, "label"), ", REVERSED, high value = high stigma", sep = "")
attr(data$media.accept, "label") <- paste(attr(data$media.accept, "label"), ", REVERSED, high value = high stigma", sep = "")
attr(data$peer.accept, "label") <- paste(attr(data$peer.accept, "label"), ", REVERSED, high value = high stigma", sep = "")
attr(data$elite.accept, "label") <- paste(attr(data$elite.accept, "label"), ", REVERSED, high value = high stigma", sep = "")


### 2.2 Add manually created variables / indices
data$second.accept <- rowMeans(data[, c(24:26)], na.rm = T) #2nd order acceptability - media, peer, elite combination
data$first.sanction.index <- rowMeans(data[,c(27:32), na.rm = T]) #1st order sanctioning (6 items)
data$second.sanction.index <- rowMeans(data[,c(33:38), na.rm = T]) #2nd order sanctioning (6 items)

data$gen.stigma <- (data$first.accept + data$second.accept + data$first.sanction.index + data$second.sanction.index )/ 4


attr(data$second.accept, "label") <- "high values indicate higher perceived second order un-acceptability/stigma"
attr(data$first.sanction.index, "label") <- "high values indicate more willingness to sanction/stigma"
attr(data$second.sanction.index, "label") <- "high values indicate higher expectation that others will sanction/stigma"
attr(data$gen.stigma, "label") <- "index of 4 stigma outcomes (average), high values indicate high stigma against Vox"


########## 3. Main Results ####

### Figures 2 [a and b] (render at 750 x 550 px each)
get_plot(validation_coefs(data, controls = F), mytitle = "A. Second-Order Empirical Stigma")
get_plot(reg_coefs(data, controls = F), mytitle = "B. Generalized Stigma (Indexed)") +
  theme(axis.title.y = element_blank())


### Estimates included in the written section
summary(lm(elite.accept ~ factor(treat), data = sbt(data, data$treat %in% c(0, 3)))) # p = 0.137
round(mean(sbt(data, data$treat == 0 & data$ideology < 5)$gen.stigma, na.rm = T), 2) # 5.28
round(mean(sbt(data, data$treat == 0 & data$ideology >= 5)$gen.stigma, na.rm = T), 2) # 3.65


### Figure 3 [a - d] (Render at 650 x 500)
get_plot(reg_coefs(data, outcome = "first.accept", controls = F), mytitle = "A. First order normative evaluations")
get_plot(reg_coefs(data, outcome = "second.accept", controls = F), mytitle = "B. Second order normative evaluations (indexed)")
get_plot(reg_coefs(data, outcome = "first.sanction.index", controls = F), mytitle = "C. Willingness to sanction (1st order)")
get_plot(reg_coefs(data, outcome = "second.sanction.index", controls = F), mytitle = "D. Sanctioning expectations (2nd order)")

### Figure 4
get_paired(data, controls = F) #render at 750 x 500



########## 4. Appendix ####

#### A.1 Demographic representative assessment ####

#copy dataset for adjustments
repdata <- data

#Get percentages in our data for each variable
repdata$M1 <- data$gender
repdata$AG1 <- ifelse(repdata$age %in% c(18:25), 1, 0)
repdata$AG2 <- ifelse(repdata$age %in% c(26:35), 1, 0)
repdata$AG3 <- ifelse(repdata$age %in% c(36:45), 1, 0)
repdata$AG4 <- ifelse(repdata$age %in% c(46:55), 1, 0)
repdata$AG5 <- ifelse(repdata$age %in% c(56:65), 1, 0)
repdata$AG6 <- ifelse(repdata$age >65, 1, 0)
repdata$R1 <- ifelse(repdata$region.char == "Andalucía", 1, 0)
repdata$R2 <- ifelse(repdata$region.char == "Aragón", 1, 0)
repdata$R3 <- ifelse(repdata$region.char == "Asturias", 1, 0)
repdata$R4 <- ifelse(repdata$region.char == "las Islas Baleares", 1, 0)
repdata$R5 <- ifelse(repdata$region.char == "las Islas Canarias", 1, 0)
repdata$R6 <- ifelse(repdata$region.char == "Cantabria", 1, 0)
repdata$R7 <- ifelse(repdata$region.char == "Castilla y León", 1, 0)
repdata$R8 <- ifelse(repdata$region.char == "Castilla-La Mancha", 1, 0)
repdata$R9 <- ifelse(repdata$region.char == "Catalunya", 1, 0)
repdata$R10 <- ifelse(repdata$region.char == "Valencia", 1, 0)
repdata$R11 <- ifelse(repdata$region.char == "Extremadura", 1, 0)
repdata$R12 <- ifelse(repdata$region.char == "Galicia", 1, 0)
repdata$R13 <- ifelse(repdata$region.char == "Madrid", 1, 0)
repdata$R14 <- ifelse(repdata$region.char == "Murcia", 1, 0)
repdata$R15 <- ifelse(repdata$region.char == "Navarra", 1, 0)
repdata$R16 <- ifelse(repdata$region.char == "País Vasco", 1, 0)
repdata$R17 <- ifelse(repdata$region.char == "La Rioja", 1, 0)

#create dataframe to store results
rep.tab <- data.frame(
  row.names = c("Male", "18-25", "26-35", "36-45", "46-55", "56-65", "66+",
                "Andalucía", "Aragón", "Asturias", "las Islas Baleares", "las Islas Canarias",
                "Cantabria", "Castilla y León", "Castilla-La Mancha", "Catalunya", "Valencia",
                "Extremadura", "Galicia", "Madrid", "Murcia", "Navarra", "País Vasco", "La Rioja"),
  
  sample.prop = rep(NA, 24), 
  
  #Census data from 2023 INE estimates
  census.prop = c(0.490, 0.097, 0.131, 0.169, 0.187, 0.158, 0.256,
                  0.179, 0.028, 0.021, 0.025, 0.046, 0.012, 0.050, 0.043,
                  0.165, 0.109, 0.022, 0.056, 0.143, 0.032, 0.014, 0.046, 0.007),
  difference = rep(NA, 24),
  p.value = rep(NA, 24)
)

# For each demographic indicator, take proportion in data, difference with census, and p-value from t-test
for (i in 1:24){
  rep.tab$sample.prop[i] <- round(mean(repdata[[i+45]], na.rm = T), 3)
  rep.tab$difference[i] <- round(abs(rep.tab$sample.prop[i] - rep.tab$census.prop[i]), 3)
  rep.tab$p.value[i] <- round(t.test(repdata[[i+45]], mu = rep.tab$census.prop[i], alternative = 'two.sided')$p.value, 3)
}


#print latex friendly table - TABLE A1
kbl(rep.tab, format = 'html', booktabs = T)


#### A.2 Balance ####
# For age, gender, education, income, political knowledge, Spanish identity, 
# regional identity, and ideology:

# run t-test to check for differences between treated group(s) and control
# run anova test for differences across the 4 groups
# store vector with 

tInfo <- t.test(age~anytreat, data = data)
anova_p <- as.numeric(anova(lm(age ~ factor(treat), data = data))[1,5])
Age <- c(range(na.omit(data$age)), 
         as.numeric(tInfo$estimate[2]), as.numeric(tInfo$estimate[1]),
         as.numeric(tInfo$p.value), anova_p)

tInfo <- t.test(gender~anytreat, data = data)
anova_p <- as.numeric(anova(lm(gender ~ factor(treat), data = data))[1,5])
Gender <- c(range(na.omit(data$gender)), 
            as.numeric(tInfo$estimate[2]), as.numeric(tInfo$estimate[1]), 
            as.numeric(tInfo$p.value), anova_p)

tInfo <- t.test(education~anytreat, data = data)
anova_p <- as.numeric(anova(lm(education ~ factor(treat), data = data))[1,5])
Education <- c(range(na.omit(data$education)), 
               as.numeric(tInfo$estimate[2]), as.numeric(tInfo$estimate[1]), 
               as.numeric(tInfo$p.value), anova_p)

tInfo <- t.test(income~anytreat, data = data)
anova_p <- as.numeric(anova(lm(income ~ factor(treat), data = data))[1,5])
Income <- c(range(na.omit(data$income)),
            as.numeric(tInfo$estimate[2]), as.numeric(tInfo$estimate[1]), 
            as.numeric(tInfo$p.value), anova_p)

tInfo <- t.test(pol.know~anytreat, data = data)
anova_p <- as.numeric(anova(lm(pol.know ~ factor(treat), data = data))[1,5])
Political_Knowledge <- c(range(na.omit(data$pol.know)), 
                         as.numeric(tInfo$estimate[2]), as.numeric(tInfo$estimate[1]),
                         as.numeric(tInfo$p.value), anova_p)

tInfo <- t.test(SP.nationalism~anytreat, data = data)
anova_p <- as.numeric(anova(lm(SP.nationalism ~ factor(treat), data = data))[1,5])
Spanish_Nationalism <- c(range(na.omit(data$SP.nationalism)),
                         as.numeric(tInfo$estimate[2]), as.numeric(tInfo$estimate[1]),
                         as.numeric(tInfo$p.value), anova_p)

tInfo <- t.test(CCAA.nationalism~anytreat, data = data)
anova_p <- as.numeric(anova(lm(CCAA.nationalism ~ factor(treat), data = data))[1,5])
Regional_Identity <- c(range(na.omit(data$CCAA.nationalism)), 
                       as.numeric(tInfo$estimate[2]), as.numeric(tInfo$estimate[1]), 
                       as.numeric(tInfo$p.value), anova_p)

tInfo <- t.test(ideology~anytreat, data = data)
anova_p <- as.numeric(anova(lm(ideology ~ factor(treat), data = data))[1,5])
Ideology <- c(range(na.omit(data$ideology)), 
              as.numeric(tInfo$estimate[2]), as.numeric(tInfo$estimate[1]), 
              as.numeric(tInfo$p.value), anova_p)


#Combine, re-name
balance_test <- as.data.frame(rbind(Age, Gender, Education, Income, Political_Knowledge, 
                                    Spanish_Nationalism, Regional_Identity, Ideology))
names(balance_test) <- c("Low", "High", "Mean Treated", "Mean Control", "P-value", "ANOVA p-value")

#round to 2 digits and print in latex friendly form
balance_test[,3:6] <- round(balance_test[,3:6], 2)

#TABLE A2
kbl(balance_test, format = "html", booktabs = T)



#### A.3 Post-intervention attrition ####

#reported in appendix, attrition section (were did we remove data?)
full <- sbt(df_incompletes, df_incompletes$quality.check == 7) # remove 2817
full <- sbt(full, full$duration< 1800) #remove 53
full <- sbt(full, full$duration> 90) #remove 34


#Save only the relevant, ordered data
names(full)
dfAT <- as.data.frame(full[, c(3:17, 23:39)])

#FIGURE A1 - render at both at 850 x 500
plot_attrition(dfAT, treatment_q = 'treat.label', freq = F, total = FALSE, title = "Full Y-axis",
               mycolors = c(control = "darkred", elite = "darkgreen", peer = "darkblue", media = "purple")) 

plot_attrition(dfAT, treatment_q = 'treat.label', freq = F, total = FALSE, title = "Constrained Y-axis",
               mycolors = c(control = "darkred", elite = "darkgreen", peer = "darkblue", media = "purple")) +
  scale_y_continuous(limits = c(0, 0.1), breaks = seq(0, 0.1, 0.01))





#### A.4 Factual Manipulation Check ####

prop.table(table(data[data$treat != 0, ]$manip)) #About 69% of treated respondents correctly ID'ed treatment as stigmatizing

#Subset data for only `successfully' treated, get plots
data$success <- ifelse(data$treat > 0, ifelse(data$manip == 1, 1, 0), 1)
ensured <- sbt(data, data$success == 1)

#FIGURE A2
get_plot(validation_coefs(ensured, controls = F), mytitle = "A. Second-Order Empirical Stigma")
get_plot(reg_coefs(ensured, controls = F), mytitle = "B. Generalized Stigma (Indexed)")

#FIGURE A3
get_plot(reg_coefs(ensured, outcome = "first.accept", controls = F),
         mytitle = "A. First order normative evaluations", range = 1.5)
get_plot(reg_coefs(ensured, outcome = "second.accept", controls = F), 
         mytitle = "B. Second order normative evaluations (indexed)", range = 1.5)
get_plot(reg_coefs(ensured, outcome = "first.sanction.index", controls = F), 
         mytitle = "C. Willingness to sanction (1st order)", range = 1.5)
get_plot(reg_coefs(ensured, outcome = "second.sanction.index", controls = F), 
         mytitle = "D. Sanctioning expectations (2nd order)", range = 1.5)

#FIGURE A4
get_paired(ensured, controls = F) + labs(title = "C. Treatment group differences") +
  theme(plot.title = element_text(size = 16, face = 'bold', hjust = 0.5))



#### A.5 Descriptive data ####

#Reshape dataset to stack variables on themselves for distribution
outcome_dist <- data.frame(
  var = c(rep("Index", nrow(sbt(data, data$treat == 0))), 
          rep("1st order\nun-acceptability", nrow(sbt(data, data$treat == 0))),
          rep("2nd order\nun-acceptability", nrow(sbt(data, data$treat == 0))), 
          rep("1st order\nwillingness to\nsanctioning", nrow(sbt(data, data$treat == 0))),
          rep("2nd order\nwillingness to\nsanctioning", nrow(sbt(data, data$treat == 0)))),
  responses = c(sbt(data, data$treat == 0)$gen.stigma, sbt(data, data$treat == 0)$first.accept, sbt(data, data$treat == 0)$second.accept,
                sbt(data, data$treat == 0)$first.sanction.index, sbt(data, data$treat == 0)$second.sanction.index))

#FIGURE A5
ggplot(data = outcome_dist) +
  geom_density_ridges(aes(x = responses, y = var, fill = var), alpha = 0.6, scale = 1) +
  scale_fill_manual(values = c("#561D25", "#CE8147", "#ECDD7B", "#D3E298", "#CDE7BE")) +
  scale_x_continuous(limits = c(0, 10), breaks = seq(0, 10, 2)) +
  labs( fill = "Outcome", x = "Responses (higher values = higher stigma)",
        title = "Stigma distributions among control group") +
  theme_bw() +
  theme(axis.title.x = element_text(size = 14, face = 'bold'), axis.text = element_text(size = 14),
        axis.title.y = element_blank(), plot.title = element_text(size = 16, face = 'bold', hjust = 0.5),
        panel.grid = element_blank(), legend.position = "none")+
  scale_y_discrete(limits = rev)


#TABLE A3 - Means and Standard Deviations
paste("Generalized Index", 
  round(mean(sbt(data, treat == 0)$gen.stigma, na.rm = T),2), round(sd(sbt(data, treat == 0)$gen.stigma, na.rm = T),2),
  round(mean(data$gen.stigma, na.rm = T),2), round(sd(data$gen.stigma, na.rm = T),2), sep = " & ")

paste("1st order (un)acceptability",
  round(mean(sbt(data, treat == 0)$first.accept, na.rm = T),2), round(sd(sbt(data, treat == 0)$first.accept, na.rm = T),2),
  round(mean(data$first.accept, na.rm = T),2), round(sd(data$first.accept, na.rm = T),2), sep = " & ")

paste("1st order sanctioning",
  round(mean(sbt(data, treat == 0)$first.sanction.index, na.rm = T),2), round(sd(sbt(data, treat == 0)$first.sanction.index, na.rm = T),2),
  round(mean(data$first.sanction.index, na.rm = T),2), round(sd(data$first.sanction.index, na.rm = T),2), sep = " & ")

paste("2nd order (un)acceptability", 
  round(mean(sbt(data, treat == 0)$second.accept, na.rm = T),2), round(sd(sbt(data, treat == 0)$second.accept, na.rm = T),2),
  round(mean(data$second.accept, na.rm = T),2), round(sd(data$second.accept, na.rm = T),2), sep = " & ")

paste("2nd order sanctioning",
  round(mean(sbt(data, treat == 0)$second.sanction.index, na.rm = T),2), round(sd(sbt(data, treat == 0)$second.sanction.index, na.rm = T),2),
  round(mean(data$second.sanction.index, na.rm = T),2), round(sd(data$second.sanction.index, na.rm = T),2), sep = " & ")






#### A.6 Including models with controls ####

#FIGURE A6
get_plot(validation_coefs(data, controls = T), mytitle = "A. Second-Order Empirical Stigma")
get_plot(reg_coefs(data, controls = T), mytitle = "B. Generalized Stigma (Indexed)")


#FIGURE A7
get_plot(reg_coefs(data, outcome = "first.accept", controls = T), 
         mytitle = "A. First order normative evaluations")
get_plot(reg_coefs(data, outcome = "second.accept", controls = T),
         mytitle = "B. Second order normative evaluations (indexed)")
get_plot(reg_coefs(data, outcome = "first.sanction.index", controls = T),
         mytitle = "C. Willingness to sanction (1st order)")
get_plot(reg_coefs(data, outcome = "second.sanction.index", controls = T), 
         mytitle = "D. Sanctioning expectations (2nd order)")

#FIGURE A8
get_paired(data, controls = T) + labs(title = "C. Treatment group differences") +
  theme(plot.title = element_text(size = 16, face = 'bold', hjust = 0.5)) 



#### A.7 Checking index validity ####
### A.7.1 Correlation matrix - TABLE A4
round(cor(data[data$treat == 0, c("first.accept", "second.accept", "first.sanction.index", 
                                  "second.sanction.index")], use = 'complete.obs'), 2)

### A.7.2 PCA
# takeaway; it looks like there are two components, and that mainly the acceptability 
#           and sanctioning look like they move together, respectively

#run pca commands on indexed outcomes
pca <- data[,c("first.accept", "second.accept", "first.sanction.index", "second.sanction.index")]
pca <- as.data.frame(scale(na.omit(pca))) #omit NAs and scale variables
data.pca <- princomp(pca) 
summary(data.pca)# first 2 components cumulatively explain 73% of variance
data.pca$loadings[, 1:2] #opposite sign loadings onto comp 2 by acceptability vs. sanctioning

# FIGURE A9
fviz_pca_var(data.pca, geom = c("point", "text"), xlim = c(-1, 1), ylim = c(-1, 1))



#### A.8 Alternative specifications for conditional effects ####

#Create binary left-right cutoff
data$RL2 <- ifelse(data$ideology > 4, "right", "left")

#Run regressions to examine significance of interaction terms
m1 <- summary(lm(gen.stigma ~ anytreat + factor(RL2) + anytreat*factor(RL2), data = sbt(data, treat %in% c(0,1))))
m2 <- summary(lm(gen.stigma ~ anytreat + factor(RL2) + anytreat*factor(RL2), data = sbt(data, treat %in% c(0,2))))
m3 <- summary(lm(gen.stigma ~ anytreat + factor(RL2) + anytreat*factor(RL2), data = sbt(data, treat %in% c(0,3))))

#TABLE A5
modelsummary(list(m1, m2, m3),  stars = T)

#Create 3-level ideology cutoff
data$RL3 <- ifelse(data$ideology > 6, "right", ifelse(data$ideology > 4, "center", "left"))

#Run regressions to examine significance of interaction terms
m1 <- summary(lm(gen.stigma ~ anytreat + factor(RL3) + anytreat*factor(RL3), data = sbt(data, treat %in% c(0,1))))
m2 <- summary(lm(gen.stigma ~ anytreat + factor(RL3) + anytreat*factor(RL3), data = sbt(data, treat %in% c(0,2))))
m3 <- summary(lm(gen.stigma ~ anytreat + factor(RL3) + anytreat*factor(RL3), data = sbt(data, treat %in% c(0,3))))

m4 <- summary(lm(gen.stigma ~ anytreat + ideology + anytreat*ideology, data = sbt(data, treat %in% c(0,1))))
m5 <- summary(lm(gen.stigma ~ anytreat + ideology + anytreat*ideology, data = sbt(data, treat %in% c(0,2))))
m6 <- summary(lm(gen.stigma ~ anytreat + ideology + anytreat*ideology, data = sbt(data, treat %in% c(0,3))))

#TABLE A6
modelsummary(list(m1, m4, m2, m5, m3, m6), stars = T)


#### A.9 Effects on support for Vox ####

#FIGURE A10
get_plot(reg_coefs(data, outcome = "vox.support"), mytitle = "Consider voting Vox in the Future") +
  labs(x = "Treatment effect (5 pt. scale, positive values = more likely to vote Vox)")
